## Project: Autocracy UPR project ----------------------------------------------
##        02 Controls             ----------------------------------------------
## Author:
##   - Chunyoung Park (UGA)
##   - Sanghoon Park (UofSC)
##
## Updated: Aug 28 2024
if (!require("devtools")) install.packages("devtools")
if (!require("pacman")) install.packages("pacman")
if (!require("rqog")) remotes::install_github("ropengov/rqog")
pacman::p_load(rqog, rio, tidyverse, dendextend, splitstackshape, dlookr, psych, tidyverse)
if (!require("dendextendRcpp")) devtools::install_github('talgalili/dendextendRcpp')
if (!require("vdemdata")) devtools::install_github("vdeminstitute/vdemdata")
if (!require("ggrepel")) install.packages("ggrepel")
if (!require("patchwork")) install.packages("patchwork")
if (!require("futurevisions")) devtools::install_github("JoeyStanley/futurevisions")

## theme_set
dlookr::import_google_font("Barlow Semi Condensed")

theme_clean <- function() {
  theme_minimal(base_family = "Barlow Semi Condensed") +
    theme(panel.grid.minor = element_blank(),
          plot.title = element_text(face = "bold", color = "black"),
          plot.subtitle = element_text(face = "bold", color = "black"),
          axis.title = element_text(family = "Barlow Semi Condensed Medium", color = "black"),
          axis.text = element_text(color = "black"),
          strip.text = element_text(family = "Barlow Semi Condensed",
                                    face = "bold", size = rel(1), hjust = 0,
                                    color = "black"),
          strip.background = element_rect(fill = "white", color = NA),
          plot.caption = element_text(hjust = 0, color = "black"),
          legend.position = "bottom")
}

ggplot2::theme_set(
  theme_clean()
)

## Load manipulated data -------------------------------------------------------
draft_data <- readRDS("data/data for analysis/draft_data.RDS")

## Dependent Variable Manipulation ---------------------------------------------
### DV is taken from the paper, Terman and Voeten (2018)
### The Severity of recommendation
###  -> the level of leniency/severity in the recommendations content 
###     based on the kinds of actions demanded of the SuR.
table(draft_data$action_category)
### Three-scale ordinal variable
### See online appendix
### | Action Code | Severity Code |
### |-------------|---------------|
### | 1           | 1             |
### | 2           | 1             |
### | 3           | 3             |
### | 4           | 2             |
### | 5           | 3             |
### 1. Not shaming / 2. Encourage; Engage with / 3. Shaming
names(draft_data)
draft_data |> mutate(
  severity = case_when(
    action_category == "1 - Minimal action" ~ 1L,
    action_category == "2 - Continuing action" ~ 1L,
    action_category == "4 - General action" ~ 2L,
    action_category == "3 - Considering action" ~ 3L,
    action_category == "5 - Specific action" ~ 3L,
    T ~ NA_integer_),
  response_bi = if_else(response %in% 
                          "Supported", 1L, 0L)) |> 
  dplyr::select(no, year, month, cycle_nu, 
                to_cow = under_revew_state_cow, 
                from_cow = recomm_state_cow, 
                severity, response_bi, issues, 
                reviewer_revieweed) ->
  analysis_data_upr

## Explanatory Variables Manipulation ------------------------------------------
### EV1: Geopolitical Affinity -------------------------------------------------
###      absolute distance between country ideal points estimated using votes 
###      in the United Nations General Assembly and multiply this by minus one,
###      thereby transforming it into a measure of affinity.
###      higher levels of ideological convergence on global issues. 

load("data/controls/raw/AgreementScores.Rdata")
dfAgree |> janitor::clean_names() -> dfAgree

##======= Necessary Var Description =======##
## session: UN Session (1-75)
## code1 and code2: COW country code
## year: year of vote
## Idealpoint_y Ideal point estimate ccode1
## Idealpoint_y Ideal point estimate ccode2
## ideal_point_distance: Absolute Ideal point distance
##   We can calculate Distance as abs(ideal_point_all_x - ideal_point_all_y)

geopol.affinity <- dfAgree |> dplyr::select(
  session = session_x,
  ccode1, ccode2, year,
  ideal_point_all_x,
  ideal_point_all_y
)

### EV2: Alliance from Correlates of War Formal Alliance v. 4.1. ---------------
###       whether or not a formal alliance exists between reviewer and target
###       See the codebook document for descriptions of variables
###       download.file(url = 
###                    "https://correlatesofwar.org/data-sets/formal-alliances/alliances-data-dta-zip/at_download/file",
###                     destfile = "Data/Raw data/State/alliance.zip", mode = "wb")
###       unzip("Data/Raw data/State/alliance.zip", exdir ="Data/Raw data/State")

alliance <- rio::import("data/controls/raw/alliance_v4.1_by_dyad_yearly.dta") |>
  dplyr::select(
    ccode1, ccode2, year, 
    dyad_st_year, dyad_end_year, defense, neutrality, nonaggression) |>
  mutate(alliance = 
           if_else(defense == 1L | neutrality == 1L | 
                     nonaggression == 1L, 1L, 0L)) |>
  mutate(alliance = case_when(
    ccode2 == 660 & ccode1 == 652 & year == 2010 ~ 1L,
    T ~ alliance
  )) |> 
  dplyr::select(-c(dyad_st_year, dyad_end_year, defense, neutrality, 
                   nonaggression))

### EV3: Relative Economic Size measured by GDPpc ------------------------------
###   When we merge this to the dyadic data, we need to manipulate this variable
###   for directed dyadic variable (e.g., Recommender GDPpc - Receiver GDPpc)

qog <- read_csv("data/controls/raw/qog_std_ts_jan23.csv")
# qog <- rqog::read_qog(which_data = "standard",
#                       year = 2023,
#                       data_type = "time-series")

qog |> 
  dplyr::select(ccodecow, year, wdi_gdpcapcon2015) ->
  qog_sub

## Control Variables Manipulation ----------------------------------------------
### Ctrl1: Issue thematic involved in each recommendation. ---------------------
###       -> Automatic clustering algorithm to aggregate them into 8 more 
###          manageable categories:
### Terman and Voeten (2018) aggregate the Issues into 8 categories
###    (1) Women, Children & Trafficking, 
###    (2) Physical Integrity Rights (including the death penalty) 
###    (3) Justice, (4) Speech & Political Participation, 
###    (5) Race, Ethnic, & Religious Discrimination, (6) Migration, 
###    (7) Socio-Economic Rights, and (8) Vulnerable Populations.
### noted-biased / supported-biased; 
#### Make issue pivot_longer for Issue variable --------------------------------
#### Borrow Terman and Voeten (2018)'s code

issue_pot <- analysis_data_upr |>
  rowid_to_column() |> 
  dplyr::select(rowid, issues)


cSplit_e(issue_pot, "issues", "|", type = "character", fill = 0, drop = T) ->
  issue_dummies

renamed_issue <- issue_dummies |>
  rename_all(~str_replace_all(.,"issues_","")) |> janitor::clean_names()

# keep just themes columns
#data <- recs[,c(9:62)] # should be 54 obs

data <- renamed_issue 

#### Updated irrelevant themes based on Terman and Voeten (2018)  --------------
data$international_instruments <- NULL
data$general <- NULL
data$international_humanitarian_law <- NULL
data$national_human_rights_institution <- NULL
data$special_procedures <- NULL
data$upr_process  <- NULL
data$technical_assistance_and_cooperation <- NULL
data$treaty_bodies <- NULL
data$national_action_plan <- NULL
data$other <- NULL
data$human_rights_education_and_training <- NULL
data$cp_rights_general <- NULL
data$esc_rights_general <- NULL

#### Updated irrelevant themes based on Terman and Voeten (2018) for the 3rd round -----
data$privacy <- NULL
data$freedom_of_assembly <- NULL
data$sd_gs <- NULL
data$rights_for_older_people <- NULL
data$right_to_life <- NULL
data$transitional_justice <- NULL
data$statelessness_and_the_right_to_nationality <- NULL

#### Assign Groups for Issues --------------------------------------------------

id_pack <- analysis_data_upr |> dplyr::select(1:6) |> rowid_to_column()

issues <- data |> mutate(
  migrantscat = case_when( # Migration
    asylum_seekers_refugees == 1L ~ 1L,
    internally_displaced_persons == 1L ~ 1L,
    labour_rights == 1L ~ 1L,
    migrants == 1L ~ 1L,
    freedom_of_movement == 1L ~ 1L,
    T ~ 0L),
  socioecon = case_when( # SocioEconomic Rights
    environment_and_human_rights == 1L ~ 1L,
    business_and_human_rights == 1L ~ 1L,
    right_to_land == 1L ~ 1L,
    right_to_development == 1L ~ 1L,
    poverty == 1L ~ 1L,
    right_to_education == 1L ~ 1L,
    right_to_food == 1L ~ 1L,
    right_to_health == 1L ~ 1L,
    right_to_housing == 1L ~ 1L,
    right_to_water_and_sanitation == 1L ~ 1L,
    T ~ 0L),
  vulnerable = case_when( # Vulnerable Populations
    disability_rights == 1L ~ 1L,
    hiv_aids == 1L ~ 1L,
    sexual_orientation_and_gender_identity == 1L ~ 1L,
    youth_rights == 1L ~ 1L,
    T ~ 0L),
  discrimination = case_when(# Race, Ethnic & Religious Discrimination
    indigenous_peoples == 1L ~ 1L,
    freedom_of_religion_and_belief == 1L ~ 1L,
    minority_rights == 1L ~ 1L,
    racial_discrimination == 1L ~ 1L,
    discrimination == 1L ~ 1L,
    violent_extremism_and_human_rights == 1L ~ 1L,
    T ~ 0L),
  womenchild = case_when( # Women, Children & Trafficking
    trafficking == 1L ~ 1L,
    gender_based_violence == 1L ~ 1L,
    sexual_and_reproductive_rights == 1L ~ 1L,
    rights_of_the_child == 1L ~ 1L,
    womens_rights == 1L ~ 1L,
    T ~ 0L),
  physint = case_when( # Physical Integrity Rights
    detention == 1L ~ 1L,
    enforced_disappearances == 1L ~ 1L,
    extrajudicial_executions == 1L ~ 1L,
    human_rights_violations_by_state_agents == 1L ~ 1L,
    torture_and_other_cid_treatment == 1L ~ 1L,
    impunity == 1L ~ 1L,
    death_penalty == 1L ~ 1L,
    T ~ 0L),
  justicecat = case_when( # Justice
    corruption == 1L ~ 1L,
    human_rights_and_terrorism == 1L ~ 1L,
    public_security == 1L ~ 1L,
    justice == 1L ~ 1L,
    access_to_justice == 1L ~ 1L,
    T ~ 0L),
  political = case_when( # Speech & Political Participation
    freedom_of_association_and_peaceful_assembly == 1L ~ 1L,
    freedom_of_opinion_and_expression == 1L ~ 1L,
    freedom_of_the_press == 1L ~ 1L,
    human_rights_defenders == 1L ~ 1L,
    civil_society == 1L ~ 1L,
    elections == 1L ~ 1L,
    T ~ 0L)
) |> dplyr::select(1, 49:56)

issue_df <- left_join(id_pack, issues, by = "rowid")

### Ctrl2: Democracy from V-Dem Project ----------------------------------------
###       -> Electoral Democracy Indicators

vdemdata::vdem -> vdem

vdem_sample <- vdem |> 
  dplyr::select(COWcode, year, v2x_polyarchy, v2x_regime,
                e_lexical_index, e_polity2, e_regionpol_6C,
                e_boix_regime, v2xps_party)

#### Democracy clarification ---------------------------------------------------
vdem_sample <- vdem_sample |> 
  mutate(
    regime_poly = case_when(
      v2x_polyarchy < 0.5 ~ "Autocracy",
      v2x_polyarchy >= 0.5 ~ "Democracy",
      is.na(v2x_polyarchy) ~ NA_character_,
      T ~ NA_character_),
    regime_brm = if_else(
      e_boix_regime == 1L, 
      "Democracy", "Autocracy"),
    regime_lexi = case_when(
      e_lexical_index == 6L ~ "Democracy",
      e_lexical_index >= 0 & 
        e_lexical_index < 6 ~ "Autocracy",
      is.na(e_lexical_index) ~ NA_character_,
      T ~ NA_character_
    ),
    regime_lexi5 = case_when(
      e_lexical_index > 4L ~ "Democracy",
      e_lexical_index >= 0L & 
        e_lexical_index < 5L ~ "Autocracy",
      is.na(e_lexical_index) ~ NA_character_,
      T ~ NA_character_
    ),
    regime_lexi4 = case_when(
      e_lexical_index > 3L ~ "Democracy",
      e_lexical_index >= 0L & 
        e_lexical_index < 4L ~ "Autocracy",
      is.na(e_lexical_index) ~ NA_character_,
      T ~ NA_character_
    ),
    regime_pol4 = case_when(
      e_polity2 < -5 ~ "Autocracy",
      e_polity2 >= 6  ~ "Democracy",
      is.na(e_polity2) ~ NA_character_,
      T ~ NA_character_),
    regime_vdem = case_when(
      v2x_regime %in% c(0, 1) ~ "Autocracy",
      v2x_regime %in% c(2, 3) ~ "Democracy",
      is.na(v2x_regime) ~ NA_character_,
      T ~ NA_character_
    ))



### Ctrl3: fixed reviewer and target country effects ---------------------------
###       -> set factor controlled

### Ctrl4: Whether the reviewer country was undergoing a review in the same ----
###        year as the target (Reviewer UPR).

### Ctrl5: Whether the target and reviewer countries come from -----------------
###        the same region (Region), e_regionpol_6C

### Ctrl6: Human Rights Committee Membership ------------------------------------

hrc_membership <- readxl::read_xlsx("data/controls/Membership1977_2022.xlsx")

hrc_membership |> 
  mutate(membership = 1L,
         ccode = countrycode::countrycode(country,
                                          origin = 'country.name',
                                          destination = 'cown')) -> 
  hrc_membership


### Bilateral trade from IOT -----------------------------------------------------

bilateral <- read_csv("data/controls/raw/IMF_DOT.csv")
bilateral |> dplyr::select(1, 3, 5, 6, 8) -> bilateral

bilateral |> 
  rename(
    countryA = "Country Name", 
    countryB = "Counterpart Country Name",
    year = "Time Period",
    export = "Goods, Value of Exports, Free on board (FOB), US Dollars (TXG_FOB_USD)",
    import = "Goods, Value of Imports, Free on board (FOB), US Dollars (TMG_FOB_USD)"
  ) -> bilateral_cleaned

bilateral_cleaned |> 
  mutate(
    Cycle_nu = 
      case_when(
        year < 2008 ~ 0L,
        year > 2007 & year < 2012 ~ 1L,
        year > 2011 & year < 2017 ~ 2L,
        year > 2017 ~ 3L,
        T ~ NA_integer_)
  ) -> bilateral_cleaned

bilateral_cleaned |> mutate(
  countryAcode = countrycode::countrycode(countryA, "country.name", "cown"),
  countryBcode = countrycode::countrycode(countryB, "country.name", "cown"),
) -> bilateral_cleaned

export <- bilateral_cleaned |> 
  dplyr::select(countryA, countryAcode, countryB, 
                countryBcode, year, Cycle_nu, export)
export |> dplyr::filter(countryAcode > countryBcode) -> export_undirec
export_undirec |> drop_na(countryAcode, countryBcode) -> export_undirec

import <- bilateral_cleaned |> 
  dplyr::select(countryA, countryAcode, countryB, 
                countryBcode, year, Cycle_nu, import)
import |> dplyr::filter(countryAcode > countryBcode) -> import_undirec
import_undirec |> drop_na(countryAcode, countryBcode) -> import_undirec

export_undirec |> 
  full_join(import_undirec, 
            by = c("countryA", "countryAcode", "countryB", "countryBcode", "year", 
                   "Cycle_nu")) ->
  bilateral_undirec

bilateral_undirec |> 
  mutate(
    cycle_nu = case_when(
    year > 2003 & year < 2008 ~ 0L,
    year > 2007 & year < 2012 ~ 1L,
    year > 2011 & year < 2016 ~ 2L,
    year > 2015 & year < 2020 ~ 3L,
    T ~ NA_integer_)) |> drop_na(cycle_nu) |>  
  group_by(countryAcode, countryBcode, cycle_nu) |> 
  mutate(total_trade = sum(export, import, na.rm = T),
         lntrade = log(total_trade + 1),
         mean_trade = mean(total_trade, na.rm = T),
         lnmeantrade = log(mean_trade + 1)) |> ungroup() ->
  bilateral_undirec_cleaned

bilateral_undirec_cleaned |> group_by(countryAcode, countryBcode) |> 
  mutate(lagmeantrade = dplyr::lag(lnmeantrade, n = 1, order_by = cycle_nu),
         lagtrade1 = dplyr::lag(lntrade, n = 1, order_by = year),
         lagtrade3 = dplyr::lag(lntrade, n = 3, order_by = year)) |> 
  ungroup() ->
  bilateral_undirec_cleaned

### Farris et al. Human Rights Scores --------------------------------------------------------------

hrs <- read_csv("data/controls/raw/HumanRightsProtectionScores_v4.01.csv") |> 
  janitor::clean_names()

hrs |> 
  dplyr::select(year, to_cow = cow, hrs  = theta_mean) -> hrs_sub

### Save Controls as Rdata -------------------------------------------------------

saveRDS(analysis_data_upr, "data/data for analysis/analysis_data_upr.RDS")
rio::export(analysis_data_upr, "data/data for analysis/analysis_data_upr.dta")
save(geopol.affinity, alliance, qog_sub, issue_df,
     vdem_sample, hrc_membership, bilateral_undirec_cleaned, hrs_sub,
     file = "data/controls/controls.rdata")

### You should consider the changed names when you use dta version files.
rio::export(geopol.affinity |> janitor::clean_names(), "data/controls/dta/ctrl_geopolaffinity.dta")
rio::export(alliance |> janitor::clean_names(), "data/controls/dta/dta/ctrl_alliance.dta")
rio::export(qog_sub |> janitor::clean_names(), "data/controls/dta/ctrl_qog_sub.dta")
rio::export(issue_df |> janitor::clean_names(), "data/controls/dta/ctrl_issue_df.dta")
rio::export(vdem_sample |> janitor::clean_names(), "data/controls/dta/ctrl_vdem_sample.dta")
rio::export(hrc_membership |> janitor::clean_names(), "data/controls/dta/ctrl_hrc_membership.dta")
rio::export(bilateral_undirec_cleaned |> janitor::clean_names(), "data/controls/dta/ctrl_bilateral_undirec_cleaned.dta")
rio::export(hrs_sub |> janitor::clean_names(), "data/controls/dta/ctrl_hrs_sub.dta")

